################################################################################
# CAS Data Engineering Modul 2 - "Crypto Gruppe" #
# Time Series Analysis of Bitcoin Prices with R #
# -> Statistical Tests and ARIMA Experiment <- #
# #
# 09 July 2021 #
# #
################################################################################
# References:
#
# SUMNER, T. 14/11/2019. Forecasting Bitcoin in R.
# Available from: https://rstudio-pubs-static.s3.amazonaws.com/549884_39fa223876e448608b7a7fa79337feba.html
#
# JAQUART, P. DANN, D. WEINHARDT, Ch. Short-term bitcoin market prediction via machine learning
# Available online at www.sciencedirect.com
#
# URAS, N. MARCHESI, L. MARCHESI, M. TONELLI, R. Forecasting Bitcoin closing price series using linear regression and neural networks models
# Uras et al. (2020), PeerJ Comput. Sci., DOI 10.7717/peerj-cs.279
#
#
################################################################################
# Remove Data & Libraries
rm(list=ls())
# Load Libraries
library(foreign)
library(psych)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.2 v stringr 1.4.0
## v tidyr 1.1.3 v forcats 0.5.1
## v readr 1.4.0
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x ggplot2::%+%() masks psych::%+%()
## x ggplot2::alpha() masks psych::alpha()
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(zoo)
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(fpp2)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
## -- Attaching packages ---------------------------------------------- fpp2 2.4 --
## v forecast 8.15 v expsmooth 2.3
## v fma 2.4
## -- Conflicts ------------------------------------------------- fpp2_conflicts --
## x ggplot2::%+%() masks psych::%+%()
## x ggplot2::alpha() masks psych::alpha()
library(astsa)
##
## Attaching package: 'astsa'
## The following objects are masked from 'package:fma':
##
## chicken, sales
## The following object is masked from 'package:forecast':
##
## gas
## The following object is masked from 'package:fpp2':
##
## oil
## The following object is masked from 'package:psych':
##
## scatter.hist
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(tseries)
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:purrr':
##
## some
## The following object is masked from 'package:dplyr':
##
## recode
## The following object is masked from 'package:psych':
##
## logit
library(caret)
## Loading required package: lattice
##
## Attaching package: 'caret'
## The following object is masked from 'package:purrr':
##
## lift
library(e1071)
# Initial configurations
# Set default font
windowsFonts(Georgia = windowsFont("Georgia"))
## Set Original plot theme
my_theme = theme(panel.grid = element_line(color = '#e6e6e6'),
panel.background = element_rect(fill = 'white'),
plot.title = element_text(hjust = .5, size = 28,
colour = '#ffa500'),
text = element_text(family = 'Georgia'),
axis.text = element_text(size = 10),
axis.title = element_text(size = 18, family = 'Georgia',
face = 'bold'),
axis.line = element_line(colour = '#737373', size = 1),
strip.background = element_rect(colour = "black",
fill = "white"),
strip.text = element_text(face = 'bold'))
# Read Data
# Read Original Bitcoin Dataset
bitcoin <- read_csv('bitcoin_full_daily_prices.csv')
##
## -- Column specification --------------------------------------------------------
## cols(
## .default = col_double(),
## Date = col_date(format = ""),
## sentiment_elon = col_character(),
## sentiment_bitcoin = col_character()
## )
## i Use `spec()` for the full column specifications.
names(bitcoin)
## [1] "Date" "Open" "High"
## [4] "Low" "Close" "WeightedPrice"
## [7] "Volume" "SMA_30" "EMA_40"
## [10] "Altcoin_EMA_40" "DASH" "DOGE"
## [13] "ETC" "ETH" "LTC"
## [16] "SC" "XEM" "XMR"
## [19] "XRP" "ZEC" "CLF"
## [22] "CNYUSDX" "DJI" "EURUSDX"
## [25] "GCF" "GSPC" "IXIC"
## [28] "JPYUSDX" "TSLA" "VIX"
## [31] "XWDTO" "Cost_per_TR" "Num_TR_per_Block"
## [34] "Bu_Be_Spread_MA8" "SMA_05" "SMA_90"
## [37] "EMA_05" "EMA_90" "MACD"
## [40] "Avg_Dir_Mvmt" "RSI" "Awesome_Osc"
## [43] "ROC" "Stoch_RSI" "Ultimate_Osc"
## [46] "True_SI" "Cum_Return" "Log_Return"
## [49] "Number_of_Transactions" "Active_Addresses" "New_Addresses"
## [52] "Hash_Rate" "sentiment_elon" "sentiment_bitcoin"
# Read United BTC Dataset (used in the Python Modelling)
btc <- read_csv('bitcoin_full_daily_returns.csv')
##
## -- Column specification --------------------------------------------------------
## cols(
## .default = col_double(),
## Date = col_date(format = ""),
## sentiment_elon = col_character(),
## sentiment_bitcoin = col_character()
## )
## i Use `spec()` for the full column specifications.
names(btc)
## [1] "Date" "WeightedPrice_return" "DASH_return"
## [4] "DOGE_return" "ETC_return" "ETH_return"
## [7] "LTC_return" "SC_return" "XEM_return"
## [10] "XMR_return" "XRP_return" "ZEC_return"
## [13] "CLF_return" "CNYUSDX_return" "DJI_return"
## [16] "EURUSDX_return" "GCF_return" "GSPC_return"
## [19] "IXIC_return" "JPYUSDX_return" "TSLA_return"
## [22] "VIX_return" "XWDTO_return" "Volume"
## [25] "SMA_30" "EMA_40" "Altcoin_EMA_40"
## [28] "Cost_per_TR" "Num_TR_per_Block" "Bu_Be_Spread_MA8"
## [31] "SMA_05" "SMA_90" "EMA_05"
## [34] "EMA_90" "MACD" "Avg_Dir_Mvmt"
## [37] "RSI" "Awesome_Osc" "ROC"
## [40] "Stoch_RSI" "Ultimate_Osc" "True_SI"
## [43] "Cum_Return" "Log_Return" "Number_of_Transactions"
## [46] "Active_Addresses" "New_Addresses" "Hash_Rate"
## [49] "sentiment_elon" "sentiment_bitcoin"
# Create Time series Dataset
# Create original time series Dataset
bit_ts = bitcoin %>%
filter(Date > as.Date('2017-01-01')) %>%
arrange(Date) %>%
select(WeightedPrice) %>%
as.matrix() %>%
ts()
# Create time series from BTC returns Dataset
bit_ret_ts = btc %>%
filter(Date > as.Date('2017-01-01')) %>%
arrange(Date) %>%
select(WeightedPrice_return) %>%
as.matrix() %>%
ts()
# Plot BTC Prices (full Dataset)
# Plot BTC Data
ggplotly(ggplot(bitcoin, aes(Date, WeightedPrice)) + geom_line(col = '#ffa500') +
labs(title = 'Bitcoin Weighted Prices 2014 -2021', x = '') +
scale_y_continuous(breaks = c(0, 5000, 10000, 15000, 30000, 60000),
labels = c('$0', '$5,000', '$10,000', '$15,000',
'$30,000', '$60,000')) + my_theme)
# Plot BTC Prices after 2017
ggplotly(bitcoin %>%
filter(Date > as.Date('2017-01-01')) %>% ggplot(aes(Date,
WeightedPrice)) +
geom_line(col = '#ffa500') +
labs(title = 'Bitcoin Weighted Prices after 2017', x = '') +
scale_y_continuous(breaks = c(0, 5000, 10000, 15000, 30000, 60000),
labels = c('$0', '$5,000', '$10,000', '$15,000',
'$30,000', '$60,000')) + my_theme)
# Correlation plots for BTC Prices & its lags
gglagplot(bit_ts, do.lines = F) + my_theme +
scale_color_continuous(low = "#b37400", high = "#ffc04d",
breaks = c(1, 366, 731, 1097, 1463),
labels = c('2017', '2018', '2019', '2020', '2021')) +
scale_y_continuous(breaks = c(0, 5000, 10000, 15000, 30000, 60000),
labels = c('$0', '$5,000', '$10,000', '$15,000',
'$30,000', '$60,000')) +
scale_x_continuous(breaks = c(5000, 10000, 15000, 30000, 60000),
labels = c('$5,000', '$10,000', '$15,000',
'$30,000', '$60,000'))

# Autocorrelation (ACF) and Partial Autocorrelation (PACF) plots
ggAcf(bit_ts, lag.max = 200) + my_theme + labs(title = 'ACF' , y = 'Correlation')

ggPacf(bit_ts, lag.max = 200) + my_theme + labs(title = 'PACF', y = '')

# Autocorrelation (ACF) and Partial Autocorrelation (PACF) after differencing
ggAcf(diff(bit_ts), lag.max = 200) + my_theme +
labs(title = 'ACF with First Differnce' , y = 'Correlation')

ggPacf(diff(bit_ts), lag.max = 200) + my_theme +
labs(title = 'PACF with First Difference', y = '')

# Autocorrelation (ACF) and Partial Autocorrelation (PACF) with daily returns
ggAcf(bit_ret_ts, lag.max = 200) + my_theme +
labs(title = 'ACF with Daily Returns' , y = 'Correlation')

ggPacf(bit_ret_ts, lag.max = 200) + my_theme +
labs(title = 'PACF with Daily Returns', y = '')

# Plot First Difference after 2017
cut_bit_df = bitcoin %>%
filter(Date > as.Date('2017-01-01'))
ggplotly(cut_bit_df[-1,] %>%
mutate(WeightedPrice = diff(cut_bit_df$WeightedPrice)) %>%
ggplot(aes(Date, WeightedPrice)) + geom_line(col = '#ffa500') +
my_theme + labs(x = '', title = 'Bitcoin Differenced By One',
y = 'Difference'))
# Plot Daily Returns after 2017
cut_bit_ret_df = btc %>%
filter(Date > as.Date('2017-01-01'))
ggplotly(cut_bit_ret_df[-1,] %>%
mutate(WeightedPrice_return = diff(
cut_bit_ret_df$WeightedPrice_return)) %>%
ggplot(aes(Date, WeightedPrice_return)) + geom_line(col = '#ffa500') +
my_theme + labs(x = '', title = 'Bitcoin Daily Returns',
y = '% Returns'))
# Box-Cox Normalization of Bitcoin Prices
BoxCox.lambda(bit_ts)
## [1] -0.03727455
ggplotly(cut_bit_df %>%
mutate(WeightedPrice = BoxCox(cut_bit_df$WeightedPrice,
lambda=BoxCox.lambda(
cut_bit_df$WeightedPrice))) %>%
ggplot(aes(Date, WeightedPrice)) + geom_line(col = '#ffa500') +
my_theme + labs(x = '', title = 'Bitcoin Box-Cox transformed',
y = 'BTC Price Transformed'))
# Plot First Difference, Transformed First Difference & Daily Returns
## Original Price
cut_bit_df[-1,] %>%
mutate(WeightedPrice = diff(cut_bit_df$WeightedPrice)) %>%
ggplot(aes(Date, WeightedPrice)) + geom_line(col = '#650fba') + my_theme +
labs(x = '', title = 'Original BTC Price', y = 'Difference')

## Transformed Price
cut_bit_df[-1,] %>%
mutate(WeightedPrice = diff(BoxCox(cut_bit_df$WeightedPrice,
lambda = BoxCox.lambda(
cut_bit_df$WeightedPrice)))) %>%
ggplot(aes(Date, WeightedPrice)) + geom_line(col = '#650fba') + my_theme +
labs(x = '', title = 'Transformed BTC Price', y = '')

## Daily Returns of Price
cut_bit_ret_df[-1,] %>%
ggplot(aes(Date, WeightedPrice_return)) + geom_line(col = '#650fba') + my_theme +
labs(x = '', title = 'Daily Returns of BTC Price', y = '')

# Autocorrelation (ACF) and Partial Autocorrelation (PACF) transformed Prices
bit_ts_tran = BoxCox(bit_ts, lambda = BoxCox.lambda(bit_ts))
ggAcf(diff(bit_ts_tran), lag.max = 200) + my_theme + labs(title = 'ACF' , y = 'Correlation')

ggPacf(diff(bit_ts_tran), lag.max = 200) + my_theme + labs(title = 'PACF', y = '')

# Test for Stationarity
# Daily Prices Dataset
adf.test(bit_ts) # p-value < 0.05 indicates the TS is stationary
##
## Augmented Dickey-Fuller Test
##
## data: bit_ts
## Dickey-Fuller = -1.6865, Lag order = 11, p-value = 0.711
## alternative hypothesis: stationary
# Box-Cox Transformed Dataset
adf.test(bit_ts_tran) # p-value < 0.05 indicates the TS is stationary
##
## Augmented Dickey-Fuller Test
##
## data: bit_ts_tran
## Dickey-Fuller = -2.1671, Lag order = 11, p-value = 0.5076
## alternative hypothesis: stationary
# Daily Returns Dataset
adf.test(bit_ret_ts) # p-value < 0.05 indicates the TS is stationary
## Warning in adf.test(bit_ret_ts): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: bit_ret_ts
## Dickey-Fuller = -10.475, Lag order = 11, p-value = 0.01
## alternative hypothesis: stationary
# Test for Homoskedasticity
# Original Dataset
# Fit LM
lmMod_orig <- lm(WeightedPrice ~ Volume+SMA_30+EMA_40+Altcoin_EMA_40+DASH+DOGE+ETC+ETH+LTC+SC+XEM+XMR+XRP+ZEC+CLF+
CNYUSDX+DJI+EURUSDX+GCF+GSPC+IXIC+JPYUSDX+TSLA+VIX+XWDTO+Cost_per_TR+Num_TR_per_Block+
Bu_Be_Spread_MA8+SMA_05+SMA_90+EMA_05+EMA_90+MACD+Avg_Dir_Mvmt+RSI+Awesome_Osc+ROC+
Stoch_RSI+Ultimate_Osc+True_SI+Cum_Return+Log_Return+Number_of_Transactions+
Active_Addresses+New_Addresses+Hash_Rate, data=bitcoin)
par(mfrow=c(2,2))
plot(lmMod_orig)
## Warning in sqrt(crit * p * (1 - hh)/hh): NaNs produced
## Warning in sqrt(crit * p * (1 - hh)/hh): NaNs produced

# Breusch Pagan Test
lmtest::bptest(lmMod_orig)
##
## studentized Breusch-Pagan test
##
## data: lmMod_orig
## BP = 975.99, df = 46, p-value < 2.2e-16
# NCV Test
car::ncvTest(lmMod_orig)
## Non-constant Variance Score Test
## Variance formula: ~ fitted.values
## Chisquare = 12287.19, Df = 1, p = < 2.22e-16
# Transformed Dataset (Daily returns)
# Fit LM
lmMod_ret <- lm(WeightedPrice_return ~ DASH_return+DOGE_return+ETC_return+ETH_return+LTC_return+SC_return+XEM_return+
XMR_return+XRP_return+ZEC_return+CLF_return+CNYUSDX_return+DJI_return+EURUSDX_return+
GCF_return+GSPC_return+IXIC_return+JPYUSDX_return+TSLA_return+VIX_return+
XWDTO_return+Volume+SMA_30+EMA_40+Altcoin_EMA_40+Cost_per_TR+Num_TR_per_Block+
Bu_Be_Spread_MA8+SMA_05+SMA_90+EMA_05+EMA_90+MACD+Avg_Dir_Mvmt+RSI+Awesome_Osc+
ROC+Stoch_RSI+Ultimate_Osc+True_SI+Cum_Return+Log_Return+Number_of_Transactions+
Active_Addresses+New_Addresses+Hash_Rate, data=btc)
par(mfrow=c(2,2))
plot(lmMod_ret)
## Warning in sqrt(crit * p * (1 - hh)/hh): NaNs produced
## Warning in sqrt(crit * p * (1 - hh)/hh): NaNs produced

# Breusch Pagan Test
lmtest::bptest(lmMod_ret)
##
## studentized Breusch-Pagan test
##
## data: lmMod_ret
## BP = 488.71, df = 46, p-value < 2.2e-16
# NCV Test
car::ncvTest(lmMod_ret)
## Non-constant Variance Score Test
## Variance formula: ~ fitted.values
## Chisquare = 419.7224, Df = 1, p = < 2.22e-16
# Transform Dataset with Box-Cox and check the heteroskedasticity again
# Make Box-Cox transformation model
distBCMod <- caret::BoxCoxTrans(bitcoin$WeightedPrice)
print(distBCMod)
## Box-Cox Transformation
##
## 2655 data points used to estimate Lambda
##
## Input data summary:
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 203.3 472.8 3765.1 7048.8 8851.4 63170.1
##
## Largest/Smallest: 311
## Sample Skewness: 3.07
##
## Estimated Lambda: 0
## With fudge factor, Lambda = 0 will be used for transformations
# Perform Box-Cox transformation
bitcoin <- cbind(bitcoin, dist_new=predict(distBCMod, bitcoin$WeightedPrice))
head(bitcoin)
## Date Open High Low Close WeightedPrice Volume
## 1 2014-02-26 530.8922 612.2200 529.3997 581.3204 572.4043 87282.479
## 2 2014-02-27 585.5042 599.6675 565.1850 578.7925 578.9220 27925.863
## 3 2014-02-28 578.0178 585.7325 542.8936 549.0490 563.2594 38993.570
## 4 2014-03-01 547.9225 576.7250 534.2375 562.1650 556.7110 21900.142
## 5 2014-03-02 561.7647 568.8898 551.1686 562.0330 560.7188 9532.377
## 6 2014-03-03 565.1014 699.7575 561.4364 667.0159 619.0314 115915.276
## SMA_30 EMA_40 Altcoin_EMA_40 DASH DOGE ETC ETH
## 1 682.5656 677.7405 9.100173 0.6414306 0.0010990163 0.8856109 10.07716
## 2 675.6139 672.9201 8.901130 0.6356737 0.0011578439 0.8856109 10.07716
## 3 667.7523 667.5708 8.698592 0.5244001 0.0010701929 0.8856109 10.07716
## 4 659.7758 662.1630 8.506256 0.8654350 0.0010410495 0.8856109 10.07716
## 5 651.6469 657.2145 8.324678 0.9219676 0.0009868652 0.8856109 10.07716
## 6 644.9606 655.3519 8.155427 1.0897676 0.0010275921 0.8856109 10.07716
## LTC SC XEM XMR XRP ZEC CLF
## 1 14.64462 0.0003029306 0.003972578 6.724992 0.007694752 277.4195 102.59
## 2 14.42255 0.0003029306 0.003972578 6.724992 0.007694752 277.4195 102.40
## 3 13.72179 0.0003029306 0.003972578 6.724992 0.007694752 277.4195 102.59
## 4 13.40066 0.0003029306 0.003972578 6.724992 0.007694752 277.4195 102.59
## 5 13.42876 0.0003029306 0.003972578 6.724992 0.007694752 277.4195 102.59
## 6 13.47429 0.0003029306 0.003972578 6.724992 0.007694752 277.4195 104.92
## CNYUSDX DJI EURUSDX GCF GSPC IXIC JPYUSDX TSLA VIX
## 1 0.1635430 16198.41 1.374608 1328.2 1845.16 4292.06 0.009789525 50.600 14.35
## 2 0.1635377 16272.65 1.368195 1331.6 1854.29 4318.93 0.009775266 50.508 14.04
## 3 0.1634414 16321.71 1.370746 1321.4 1859.45 4308.12 0.009793360 48.962 14.00
## 4 0.1634414 16321.71 1.370746 1321.4 1859.45 4308.12 0.009793360 48.962 14.00
## 5 0.1634414 16321.71 1.370746 1321.4 1859.45 4308.12 0.009793360 48.962 14.00
## 6 0.1629965 16168.03 1.377695 1350.1 1845.73 4277.30 0.009871181 50.112 16.00
## XWDTO Cost_per_TR Num_TR_per_Block Bu_Be_Spread_MA8 SMA_05 SMA_90
## 1 30.02990 40.68028 395.0385 0.1143584 563.3494 613.6189
## 2 30.19822 38.02965 460.0000 0.1152040 562.1636 613.6189
## 3 30.02990 33.50832 442.0000 0.1143583 551.7505 613.6189
## 4 30.02990 33.50832 442.0000 0.1143583 551.9148 613.6189
## 5 30.02990 33.50832 442.0000 0.1143583 566.4031 613.6189
## 6 29.70214 27.54891 373.0000 0.1118211 575.7285 613.6189
## EMA_05 EMA_90 MACD Avg_Dir_Mvmt RSI Awesome_Osc ROC
## 1 559.4679 751.0339 -53.84000 75.87720 38.58185 -132.17401 -5.829356
## 2 565.9526 747.2512 -51.12634 74.32864 38.26849 -126.10740 -7.487245
## 3 565.0548 743.2074 -50.79031 73.04248 34.69747 -129.37471 -11.904074
## 4 562.2736 739.1086 -48.90195 71.90756 37.46850 -122.92658 -9.185845
## 5 561.7553 735.1879 -46.87571 70.85371 37.45128 -99.55943 2.320085
## 6 580.8473 732.6350 -36.37930 67.06642 55.12140 -83.06932 16.783975
## Stoch_RSI Ultimate_Osc True_SI Cum_Return Log_Return
## 1 1.0000000 54.11417 -38.66634 -30.25103 9.05408162
## 2 0.9818706 58.71632 -36.37036 -30.55432 -0.43578758
## 3 0.7752709 53.57958 -35.54387 -34.12306 -5.27563417
## 4 0.9355880 52.17020 -34.01388 -32.54936 2.36076239
## 5 0.9345913 54.50800 -32.80567 -32.56520 -0.02348741
## 6 1.0000000 63.02288 -25.11566 -19.96896 17.12533192
## Number_of_Transactions Active_Addresses New_Addresses Hash_Rate
## 1 73176 201181 110785 28675.77
## 2 70859 185098 95229 29478.51
## 3 70290 196525 92184 31620.30
## 4 63402 181719 100239 27215.37
## 5 56635 159491 80905 26872.27
## 6 81264 237371 123677 28209.56
## sentiment_elon sentiment_bitcoin dist_new
## 1 neutral positive 6.349846
## 2 neutral positive 6.361168
## 3 neutral positive 6.333740
## 4 neutral positive 6.322046
## 5 neutral positive 6.329220
## 6 neutral positive 6.428156
# Original Dataset BoxCox Transformed
# Fit LM
lmMod_bc <- lm(dist_new ~ Volume+SMA_30+EMA_40+Altcoin_EMA_40+DASH+DOGE+ETC+ETH+LTC+SC+XEM+XMR+XRP+ZEC+CLF+
CNYUSDX+DJI+EURUSDX+GCF+GSPC+IXIC+JPYUSDX+TSLA+VIX+XWDTO+Cost_per_TR+Num_TR_per_Block+
Bu_Be_Spread_MA8+SMA_05+SMA_90+EMA_05+EMA_90+MACD+Avg_Dir_Mvmt+RSI+Awesome_Osc+ROC+
Stoch_RSI+Ultimate_Osc+True_SI+Cum_Return+Log_Return+Number_of_Transactions+
Active_Addresses+New_Addresses+Hash_Rate, data=bitcoin)
lmtest::bptest(lmMod_bc)
##
## studentized Breusch-Pagan test
##
## data: lmMod_bc
## BP = 758.13, df = 46, p-value < 2.2e-16
car::ncvTest(lmMod_bc)
## Non-constant Variance Score Test
## Variance formula: ~ fitted.values
## Chisquare = 122.7901, Df = 1, p = < 2.22e-16
plot(lmMod_bc)



## Warning in sqrt(crit * p * (1 - hh)/hh): NaNs produced
## Warning in sqrt(crit * p * (1 - hh)/hh): NaNs produced

# Transform 2017+ Dataset with Box-Cox and check the heteroskedasticity again
# Make Box-Cox transformation model
distBCMod_2017p <- caret::BoxCoxTrans(cut_bit_df$WeightedPrice)
print(distBCMod_2017p)
## Box-Cox Transformation
##
## 1614 data points used to estimate Lambda
##
## Input data summary:
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 800.5 4518.7 7955.4 11310.2 10598.7 63170.1
##
## Largest/Smallest: 78.9
## Sample Skewness: 2.54
##
## Estimated Lambda: 0
## With fudge factor, Lambda = 0 will be used for transformations
# Perform Box-Cox transformation
cut_bit_df <- cbind(cut_bit_df, dist_new_2017p=predict(distBCMod_2017p, cut_bit_df$WeightedPrice))
head(cut_bit_df)
## Date Open High Low Close WeightedPrice Volume
## 1 2017-01-02 1000.834 1107.8520 989.0440 1023.5162 1034.6061 12317.03
## 2 2017-01-03 1015.385 1043.6383 1008.9160 1040.3600 1028.5963 12102.70
## 3 2017-01-04 1043.216 1160.4240 1033.1701 1142.4498 1109.4000 30923.99
## 4 2017-01-05 1137.100 1157.9446 904.4820 1015.5040 1028.4806 54107.88
## 5 2017-01-06 1004.590 1041.9672 888.2768 924.3226 941.5943 39906.16
## 6 2017-01-07 914.762 939.9091 824.6119 917.5961 884.8570 30801.34
## SMA_30 EMA_40 Altcoin_EMA_40 DASH DOGE ETC ETH
## 1 845.3696 844.5064 15.60276 11.90462 0.0002172673 1.435816 8.381841
## 2 854.0453 853.4864 15.26059 12.70047 0.0002160052 1.503736 9.343522
## 3 865.7015 865.9700 14.98043 15.76614 0.0002218800 1.650521 10.775181
## 4 874.5525 873.8973 14.68696 14.59271 0.0002159809 1.620495 10.274089
## 5 880.2861 877.1996 14.38358 13.29806 0.0002165667 1.534667 10.313075
## 6 883.9599 877.5732 14.07224 12.16846 0.0002212143 1.411719 9.964207
## LTC SC XEM XMR XRP ZEC CLF
## 1 4.672416 0.0002379594 0.003424546 15.59237 0.006404212 50.19510 52.33
## 2 4.579383 0.0002365772 0.003476656 16.16504 0.006408155 49.69577 52.33
## 3 4.620385 0.0002662560 0.003505704 17.40406 0.006645306 53.87401 53.26
## 4 4.450503 0.0002674050 0.003373416 16.45925 0.006006327 50.63759 53.76
## 5 4.095577 0.0002542305 0.003361492 14.93277 0.006449921 48.43582 53.99
## 6 3.908838 0.0002654571 0.003318214 12.92683 0.006406365 46.92434 53.99
## CNYUSDX DJI EURUSDX GCF GSPC IXIC JPYUSDX TSLA VIX
## 1 0.1440134 19881.76 1.052698 1160.4 2257.83 5429.08 0.008562011 43.398 12.85
## 2 0.1440092 19881.76 1.046003 1160.4 2257.83 5429.08 0.008511000 43.398 12.85
## 3 0.1436823 19942.16 1.041992 1163.8 2270.75 5477.00 0.008499137 45.398 11.85
## 4 0.1444023 19899.29 1.050089 1179.7 2269.00 5487.94 0.008538762 45.350 11.67
## 5 0.1451821 19963.80 1.060592 1171.9 2276.98 5521.06 0.008675661 45.802 11.32
## 6 0.1451821 19963.80 1.060592 1171.9 2276.98 5521.06 0.008675661 45.802 11.32
## XWDTO Cost_per_TR Num_TR_per_Block Bu_Be_Spread_MA8 SMA_05 SMA_90
## 1 40.29515 7.350606 1829.881 0.1836159 981.8926 746.8906
## 2 40.29515 7.410000 1839.410 0.1856546 993.8454 751.1161
## 3 40.35140 7.300000 2143.920 0.1876934 1024.6242 756.4615
## 4 40.27640 6.913398 2060.721 0.1897321 1038.3718 761.0021
## 5 40.31391 5.470000 2151.580 0.1891224 1028.5355 764.5755
## 6 40.31391 5.470000 2151.580 0.1891224 998.5857 767.5439
## EMA_05 EMA_90 MACD Avg_Dir_Mvmt RSI Awesome_Osc ROC
## 1 986.7365 769.8161 58.98192 62.13294 83.87084 148.7609 10.184276
## 2 1000.6898 775.5036 62.40263 63.33110 85.06319 152.1394 16.116281
## 3 1036.9265 782.8420 72.51545 64.91275 89.92477 170.4634 27.171635
## 4 1034.1112 788.2407 69.48548 62.80727 62.62813 177.2781 12.394167
## 5 1003.2722 791.6111 59.04598 60.51073 50.71916 167.9914 -1.820025
## 6 963.8005 793.6604 49.65741 57.16568 49.96436 131.3790 -5.868097
## Stoch_RSI Ultimate_Osc True_SI Cum_Return Log_Return Number_of_Transactions
## 1 0.6519728 62.30141 60.70143 22.80527 3.1831595 290951
## 2 0.6588502 62.12566 62.42697 24.82625 1.6322853 301664
## 3 0.9919971 65.90169 66.36453 37.07537 9.3608097 328642
## 4 0.0000000 59.10028 55.77444 21.84394 -11.7789863 288501
## 5 0.0000000 52.07252 41.69816 10.90365 -9.4079175 346405
## 6 0.0000000 55.31741 32.33093 10.09658 -0.7303834 282060
## Active_Addresses New_Addresses Hash_Rate sentiment_elon sentiment_bitcoin
## 1 706004 350945 2514432 neutral positive
## 2 656631 363271 2590800 neutral positive
## 3 699023 394821 2490282 neutral positive
## 4 653187 352053 2204094 neutral positive
## 5 719666 400985 2579718 positive positive
## 6 649279 333774 2202326 positive positive
## dist_new_2017p
## 1 6.941776
## 2 6.935950
## 3 7.011575
## 4 6.935838
## 5 6.847574
## 6 6.785426
# Original Dataset BoxCox Transformed
# Fit LM
lmMod_bc_2017p <- lm(dist_new_2017p ~ Volume+SMA_30+EMA_40+Altcoin_EMA_40+DASH+DOGE+ETC+ETH+LTC+SC+XEM+XMR+XRP+ZEC+CLF+
CNYUSDX+DJI+EURUSDX+GCF+GSPC+IXIC+JPYUSDX+TSLA+VIX+XWDTO+Cost_per_TR+Num_TR_per_Block+
Bu_Be_Spread_MA8+SMA_05+SMA_90+EMA_05+EMA_90+MACD+Avg_Dir_Mvmt+RSI+Awesome_Osc+ROC+
Stoch_RSI+Ultimate_Osc+True_SI+Cum_Return+Log_Return+Number_of_Transactions+
Active_Addresses+New_Addresses+Hash_Rate, data=cut_bit_df)
lmtest::bptest(lmMod_bc_2017p)
##
## studentized Breusch-Pagan test
##
## data: lmMod_bc_2017p
## BP = 309.24, df = 46, p-value < 2.2e-16
car::ncvTest(lmMod_bc_2017p)
## Non-constant Variance Score Test
## Variance formula: ~ fitted.values
## Chisquare = 29.24724, Df = 1, p = 6.3707e-08
plot(lmMod_bc_2017p)




# Transform Returns Dataset with Box-Cox and check the heteroskedasticity again
# Make Box-Cox transformation model
distBCMod_2017p_ret <- caret::BoxCoxTrans(btc$WeightedPrice_return)
print(distBCMod_2017p_ret)
## Box-Cox Transformation
##
## 2654 data points used to estimate Lambda
##
## Input data summary:
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -0.227187 -0.011616 0.001398 0.002152 0.016413 0.412763
##
## Lambda could not be estimated; no transformation is applied
# Perform Box-Cox transformation
btc <- cbind(btc, dist_new_2017p_ret=predict(distBCMod_2017p_ret, btc$WeightedPrice_return))
head(btc)
## Date WeightedPrice_return DASH_return DOGE_return ETC_return
## 1 2014-02-27 0.011386392 -0.008975094 0.05352749 0
## 2 2014-02-28 -0.027054696 -0.175048216 -0.07570196 0
## 3 2014-03-01 -0.011625969 0.650333348 -0.02723187 0
## 4 2014-03-02 0.007199204 0.065322674 -0.05204781 0
## 5 2014-03-03 0.103996044 0.182002114 0.04126900 0
## 6 2014-03-04 0.088578535 0.032654424 0.01644381 0
## ETH_return LTC_return SC_return XEM_return XMR_return XRP_return ZEC_return
## 1 0 -0.015164062 0 0 0 0 0
## 2 0 -0.048587554 0 0 0 0 0
## 3 0 -0.023403005 0 0 0 0 0
## 4 0 0.002096915 0 0 0 0 0
## 5 0 0.003390253 0 0 0 0 0
## 6 0 0.249238757 0 0 0 0 0
## CLF_return CNYUSDX_return DJI_return EURUSDX_return GCF_return
## 1 -0.001851982 -3.261895e-05 0.004583180 -0.004665577 0.002559874
## 2 0.001855418 -5.884376e-04 0.003014848 0.001864209 -0.007659921
## 3 0.000000000 0.000000e+00 0.000000000 0.000000000 0.000000000
## 4 0.000000000 0.000000e+00 0.000000000 0.000000000 0.000000000
## 5 0.022711784 -2.722010e-03 -0.009415661 0.005069901 0.021719351
## 6 -0.015154369 -1.955477e-04 0.014092662 -0.002898517 -0.009110382
## GSPC_return IXIC_return JPYUSDX_return TSLA_return VIX_return
## 1 0.004948083 0.006260424 -0.001456506 -0.001818163 -0.02160282
## 2 0.002782689 -0.002502948 0.001850968 -0.030608966 -0.00284900
## 3 0.000000000 0.000000000 0.000000000 0.000000000 0.00000000
## 4 0.000000000 0.000000000 0.000000000 0.000000000 0.00000000
## 5 -0.007378510 -0.007154005 0.007946333 0.023487555 0.14285714
## 6 0.015267701 0.017457371 -0.001055092 0.017081717 -0.11874998
## XWDTO_return Volume SMA_30 EMA_40 Altcoin_EMA_40 Cost_per_TR
## 1 0.00560488 27925.863 675.6139 672.9201 8.901130 38.02965
## 2 -0.00557364 38993.570 667.7523 667.5708 8.698592 33.50832
## 3 0.00000000 21900.142 659.7758 662.1630 8.506256 33.50832
## 4 0.00000000 9532.377 651.6469 657.2145 8.324678 33.50832
## 5 -0.01091454 115915.276 644.9606 655.3519 8.155427 27.54891
## 6 0.02028046 53101.347 639.9936 656.2550 8.049617 33.87386
## Num_TR_per_Block Bu_Be_Spread_MA8 SMA_05 SMA_90 EMA_05 EMA_90
## 1 460.0000 0.1152040 562.1636 613.6189 565.9526 747.2512
## 2 442.0000 0.1143583 551.7505 613.6189 565.0548 743.2074
## 3 442.0000 0.1143583 551.9148 613.6189 562.2736 739.1086
## 4 442.0000 0.1143583 566.4031 613.6189 561.7553 735.1879
## 5 373.0000 0.1118211 575.7285 613.6189 580.8473 732.6350
## 6 567.7852 0.1109754 594.7170 613.6189 611.8530 731.3434
## MACD Avg_Dir_Mvmt RSI Awesome_Osc ROC Stoch_RSI Ultimate_Osc
## 1 -51.12634 74.32864 38.26849 -126.10740 -7.487245 0.9818706 58.71632
## 2 -50.79031 73.04248 34.69747 -129.37471 -11.904074 0.7752709 53.57958
## 3 -48.90195 71.90756 37.46850 -122.92658 -9.185845 0.9355880 52.17020
## 4 -46.87571 70.85371 37.45128 -99.55943 2.320085 0.9345913 54.50800
## 5 -36.37930 67.06642 55.12140 -83.06932 16.783975 1.0000000 63.02288
## 6 -27.73094 63.54965 55.13786 -61.48890 10.358037 1.0000000 56.80929
## True_SI Cum_Return Log_Return Number_of_Transactions Active_Addresses
## 1 -36.37036 -30.55432 -0.43578758 70859 185098
## 2 -35.54387 -34.12306 -5.27563417 70290 196525
## 3 -34.01388 -32.54936 2.36076239 63402 181719
## 4 -32.80567 -32.56520 -0.02348741 56635 159491
## 5 -25.11566 -19.96896 17.12533192 81264 237371
## 6 -19.33400 -19.95377 0.01898200 84600 232480
## New_Addresses Hash_Rate sentiment_elon sentiment_bitcoin dist_new_2017p_ret
## 1 95229 29478.51 neutral positive 0.011386392
## 2 92184 31620.30 neutral positive -0.027054696
## 3 100239 27215.37 neutral positive -0.011625969
## 4 80905 26872.27 neutral positive 0.007199204
## 5 123677 28209.56 neutral positive 0.103996044
## 6 132656 28401.79 neutral positive 0.088578535
# Original Dataset BoxCox Transformed
# Fit LM
lmMod_bc_2017p_ret <- lm(dist_new_2017p_ret ~ DASH_return+DOGE_return+ETC_return+ETH_return+LTC_return+SC_return+XEM_return+
XMR_return+XRP_return+ZEC_return+CLF_return+CNYUSDX_return+DJI_return+EURUSDX_return+
GCF_return+GSPC_return+IXIC_return+JPYUSDX_return+TSLA_return+VIX_return+
XWDTO_return+Volume+SMA_30+EMA_40+Altcoin_EMA_40+Cost_per_TR+Num_TR_per_Block+
Bu_Be_Spread_MA8+SMA_05+SMA_90+EMA_05+EMA_90+MACD+Avg_Dir_Mvmt+RSI+Awesome_Osc+
ROC+Stoch_RSI+Ultimate_Osc+True_SI+Cum_Return+Log_Return+Number_of_Transactions+
Active_Addresses+New_Addresses+Hash_Rate, data=btc)
lmtest::bptest(lmMod_bc_2017p_ret)
##
## studentized Breusch-Pagan test
##
## data: lmMod_bc_2017p_ret
## BP = 488.71, df = 46, p-value < 2.2e-16
car::ncvTest(lmMod_bc_2017p_ret)
## Non-constant Variance Score Test
## Variance formula: ~ fitted.values
## Chisquare = 419.7224, Df = 1, p = < 2.22e-16
plot(lmMod_bc_2017p_ret)



## Warning in sqrt(crit * p * (1 - hh)/hh): NaNs produced
## Warning in sqrt(crit * p * (1 - hh)/hh): NaNs produced

# Try an Autoarima with transformed data
arima_tr <- auto.arima(bit_ts_tran)
checkresiduals(arima_tr)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(3,1,1) with drift
## Q* = 8.1023, df = 5, p-value = 0.1507
##
## Model df: 5. Total lags used: 10
# Try an Autoarima with daily returns data
arima_ret <- auto.arima(bit_ret_ts)
checkresiduals(arima_ret)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(1,0,2) with non-zero mean
## Q* = 10.711, df = 6, p-value = 0.09773
##
## Model df: 4. Total lags used: 10
# Taking only 2020
cut2_bit_df = cut_bit_df %>%
filter(Date >= ymd('2020-01-01'))
ggplotly(cut2_bit_df %>%
mutate(WeightedPrice = BoxCox(cut2_bit_df$WeightedPrice,
lambda = BoxCox.lambda(
cut2_bit_df$WeightedPrice))) %>%
ggplot(aes(Date, WeightedPrice)) + geom_line(col = '#ffa500') +
labs(title = 'Bitcoin', x = '', y = 'Price (Transformed)') + my_theme)
ggplotly(cut2_bit_df[-1,] %>%
mutate(WeightedPrice = diff(BoxCox(cut2_bit_df$WeightedPrice,
lambda = BoxCox.lambda(
cut2_bit_df$WeightedPrice)))) %>%
ggplot(aes(Date, WeightedPrice)) + geom_line(col = '#ffa500') +
my_theme + labs(x = '', title = 'Transformed Price', y = 'Difference'))
# ACF, PCF only for 2020
bit_ts2 = bitcoin %>%
filter(Date >= as.Date('2020-01-01')) %>%
arrange(Date) %>%
select(WeightedPrice) %>%
as.matrix() %>%
ts()
bit_ts_tran2 = BoxCox(bit_ts2, lambda = BoxCox.lambda(bit_ts2))
ggAcf(diff(bit_ts_tran2), lag.max = 200) + my_theme + labs(title = 'ACF' ,
y = 'Correlation')

ggPacf(diff(bit_ts_tran2), lag.max = 200) + my_theme + labs(title = 'PACF', y = '')

# Autoarima for Data only 2020
arima_2020_tr <- auto.arima(bit_ts_tran2)
checkresiduals(arima_2020_tr)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,1,1) with drift
## Q* = 14.894, df = 8, p-value = 0.06123
##
## Model df: 2. Total lags used: 10
# Taking only 2021
cut3_bit_df = cut_bit_df %>%
filter(Date >= ymd('2021-01-01'))
ggplotly(cut3_bit_df %>%
mutate(WeightedPrice = BoxCox(cut3_bit_df$WeightedPrice,
lambda = BoxCox.lambda(
cut3_bit_df$WeightedPrice))) %>%
ggplot(aes(Date, WeightedPrice)) + geom_line(col = '#ffa500') +
labs(title = 'Bitcoin', x = '', y = 'Price (Transformed)') + my_theme)
ggplotly(cut3_bit_df[-1,] %>%
mutate(WeightedPrice = diff(BoxCox(cut3_bit_df$WeightedPrice,
lambda = BoxCox.lambda(
cut3_bit_df$WeightedPrice)))) %>%
ggplot(aes(Date, WeightedPrice)) + geom_line(col = '#ffa500') +
my_theme + labs(x = '', title = 'Transformed Price', y = 'Difference'))
# ACF, PCF only for 2021
bit_ts3 = bitcoin %>%
filter(Date >= as.Date('2021-01-01')) %>%
arrange(Date) %>%
select(WeightedPrice) %>%
as.matrix() %>%
ts()
bit_ts_tran3 = BoxCox(bit_ts3, lambda = BoxCox.lambda(bit_ts2))
ggAcf(diff(bit_ts_tran3), lag.max = 200) + my_theme + labs(title = 'ACF' ,
y = 'Correlation')

ggPacf(diff(bit_ts_tran3), lag.max = 200) + my_theme + labs(title = 'PACF', y = '')

# Autoarima for Data only 2021
arima_2021_tr <- auto.arima(bit_ts_tran3)
checkresiduals(arima_2021_tr)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,1,0)
## Q* = 13.409, df = 10, p-value = 0.2017
##
## Model df: 0. Total lags used: 10
# ARIMA Model Fits
## Data after 2017
bit_ts_past_2017 = bitcoin %>%
filter(Date >= as.Date('2017-01-01')) %>%
arrange(Date) %>%
select(WeightedPrice) %>%
as.matrix() %>%
ts()
bit_ts_past_2017 %>%
BoxCox(lambda = BoxCox.lambda(bit_ts_past_2017)) %>%
Arima(order = c(0,1,0), include.drift = T) %>%
checkresiduals()

##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,1,0) with drift
## Q* = 81.859, df = 9, p-value = 6.894e-14
##
## Model df: 1. Total lags used: 10
summary(Arima(bit_ts_tran, order = c(0,1,0), include.drift = T))
## Series: bit_ts_tran
## ARIMA(0,1,0) with drift
##
## Coefficients:
## drift
## 0.0016
## s.e. 0.0006
##
## sigma^2 estimated as 0.000653: log likelihood=3626.59
## AIC=-7249.18 AICc=-7249.17 BIC=-7238.41
##
## Training set error measures:
## ME RMSE MAE MPE MAPE
## Training set 3.788572e-06 0.02553818 0.01755831 -1.606423e-05 0.2328859
## MASE ACF1
## Training set 0.9947251 0.2064681
## Daily Return Data after 2017
bit_ts_ret_past_2017 = btc %>%
filter(Date >= as.Date('2017-01-01')) %>%
arrange(Date) %>%
select(WeightedPrice_return) %>%
as.matrix() %>%
ts()
bit_ts_ret_past_2017 %>%
Arima(order = c(0,1,0), include.drift = T) %>%
checkresiduals()

##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,1,0) with drift
## Q* = 262.58, df = 9, p-value < 2.2e-16
##
## Model df: 1. Total lags used: 10
summary(Arima(bit_ret_ts, order = c(0,1,0), include.drift = T))
## Series: bit_ret_ts
## ARIMA(0,1,0) with drift
##
## Coefficients:
## drift
## 0.0000
## s.e. 0.0011
##
## sigma^2 estimated as 0.002042: log likelihood=2707.03
## AIC=-5410.07 AICc=-5410.06 BIC=-5399.3
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 2.741485e-08 0.04516154 0.03086297 85.18454 445.309 0.9993754
## ACF1
## Training set -0.3749845
## Random Walk Test on Daily Data only 2020
bit_ts_tran2 %>%
Arima(order = c(0,1,0), include.drift = T) %>%
checkresiduals()

##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,1,0) with drift
## Q* = 32.082, df = 9, p-value = 0.0001927
##
## Model df: 1. Total lags used: 10
summary(Arima(bit_ret_ts, order = c(0,1,0), include.drift = T))
## Series: bit_ret_ts
## ARIMA(0,1,0) with drift
##
## Coefficients:
## drift
## 0.0000
## s.e. 0.0011
##
## sigma^2 estimated as 0.002042: log likelihood=2707.03
## AIC=-5410.07 AICc=-5410.06 BIC=-5399.3
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 2.741485e-08 0.04516154 0.03086297 85.18454 445.309 0.9993754
## ACF1
## Training set -0.3749845
## Random Walk Test on Daily Data only 2021
bit_ts_tran3 %>%
Arima(order = c(0,1,0), include.drift = T) %>%
checkresiduals()

##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,1,0) with drift
## Q* = 13.43, df = 9, p-value = 0.1441
##
## Model df: 1. Total lags used: 10
summary(Arima(bit_ret_ts, order = c(0,1,0), include.drift = T))
## Series: bit_ret_ts
## ARIMA(0,1,0) with drift
##
## Coefficients:
## drift
## 0.0000
## s.e. 0.0011
##
## sigma^2 estimated as 0.002042: log likelihood=2707.03
## AIC=-5410.07 AICc=-5410.06 BIC=-5399.3
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 2.741485e-08 0.04516154 0.03086297 85.18454 445.309 0.9993754
## ACF1
## Training set -0.3749845
# Check errors
# Transformed Data
err_tr = residuals(Arima(bit_ts_tran, order = c(0,1,0), include.drift = T))
cat('Standard Deviation = ', sd(err_tr))
## Standard Deviation = 0.02554609
cat('Mean =', mean(err_tr))
## Mean = 3.788572e-06
invers_BoxCox = function(ts_data, lambda){
original_ts = (ts_data * lambda + 1) ** (1/lambda)
return(original_ts)
}
invers_BoxCox(sd(err_tr), BoxCox.lambda(bit_ts))
## [1] 1.025888
# Daily Return Data
err_ret = residuals(Arima(bit_ret_ts, order = c(0,1,0), include.drift = T))
cat('Standard Deviation = ', sd(err_ret))
## Standard Deviation = 0.04517554
cat('Mean =', mean(err_ret))
## Mean = 2.741485e-08
invers_BoxCox(sd(err_ret), BoxCox.lambda(bit_ret_ts))
## Warning in guerrero(x, lower, upper): Guerrero's method for selecting a Box-Cox
## parameter (lambda) is given for strictly positive data.
## [1] 1.045235
# Forecast with ARIMA
## h is the the length you want the prediction to be in units of days
fit_model = function(bitcoin_data, h){
bitcoin_df = bitcoin_data %>%
filter(Date >= as.Date('2017-01-01')) %>%
arrange(Date)
time_series = bitcoin_df %>%
select(WeightedPrice) %>%
ts()
predictions = time_series %>%
BoxCox(lambda = BoxCox.lambda(time_series)) %>%
auto.arima() %>%
forecast(h)
forecast_df = cbind(data.frame(predictions[4]),
data.frame(predictions[5]),
data.frame(predictions[6]))
the_forecast = invers_BoxCox(forecast_df, lambda = BoxCox.lambda(time_series))
the_forecast = the_forecast %>%
mutate(Date = tail(bitcoin_df$Date, h) + h) %>%
as_tibble()
return(the_forecast)
}
# read the updated data for BTC prices
bitcoin_new <- read_csv('btc_base_dataset_NEW.csv')
##
## -- Column specification --------------------------------------------------------
## cols(
## .default = col_double(),
## Date = col_date(format = ""),
## Day_of_Week = col_character()
## )
## i Use `spec()` for the full column specifications.
# Plot the new BTC Data
ggplotly(ggplot(bitcoin_new, aes(Date, WeightedPrice)) +
geom_line(col = '#ffa500') +
labs(title = 'Bitcoin Weighted Prices 2014 -2021 (new)', x = '') +
scale_y_continuous(breaks = c(0, 5000, 10000, 15000, 30000, 60000),
labels = c('$0', '$5,000', '$10,000', '$15,000',
'$30,000', '$60,000')) + my_theme)
## Predict the BTC Price for the next 30 Days
ggplotly(fit_model(bitcoin, 30) %>%
ggplot(aes(x = Date, y = mean)) + geom_line(col = '#ff2500') +
geom_ribbon(aes(ymin = lower.80., ymax = upper.80.), alpha = .3,
fill = '#ffc04c') +
geom_ribbon(aes(ymin = lower.95., ymax = upper.95.), alpha = .3,
fill = '#ffe4b2') +
geom_line(data = bitcoin_new, aes(Date, WeightedPrice)) +
geom_line(data = filter(bitcoin, Date >= as.Date('2015-01-01')),
aes(Date, WeightedPrice), col = '#ffa500') + my_theme +
labs(title = 'Bitcoin Prediction of 30 Days', y = 'Price', x = '') +
scale_y_continuous(breaks = c(0, 5000, 10000, 15000, 20000, 25000,
30000, 35000, 40000, 45000, 50000,
55000, 60000),
labels = c('$0', '$5,000', '$10,000', '$15,000',
'$20,000', '$25,000', '$30,000',
'$35,000', '$40,000', '$45,000',
'$50,000', '$55,000', '$60,000')))